import sys
import os
import csv
from random import shuffle

data = sys.argv[1]

os.system("export CLASSPATH=/home/moos/software/weka-3-6-7/weka.jar")
os.system("java weka.filters.unsupervised.attribute.AddCluster -W \"weka.clusterers.EM\" -I 1,2 -i {0} -o out.csv".format(data))
os.system("java weka.core.converters.CSVSaver -i out.csv -o out.csv")

csvReader = csv.reader(open('out.csv', 'rb'), delimiter=',', quotechar='\'')
rows = [row for row in csvReader]

rows.sort(key=lambda x: x[-1])

prevCluster = rows[0][-1]
curCluster = []

#os.system("rm -rf clusters")
#os.system("mkdir clusters")

for row in rows:
    if prevCluster != row[-1]:
        print "CLUSTER"
        shuffle(curCluster)
        print ''.join(curCluster)
        curCluster = []
    curCluster.append(row[1])
#    print("{0} {1} -> {2}".format(row[0].ljust(10), row[1], row[-1]))
    prevCluster = row[-1]

print "CLUSTER"
print ''.join(curCluster)
